
## IMPORT

from pyspark.context import SparkContext, SparkConf
from pyspark.sql import SparkSession
from pyspark.sql.context import SQLContext
from pyspark.sql.functions import *
from pyspark.sql.window import Window

## SPARK SESSION 

sc = SparkContext.getOrCreate(conf=SparkConf())
spark = SparkSession(sc)
spark.sparkContext.setLogLevel('WARN')
sc.setCheckpointDir("splink_sandbox/temp_graphframes/")

geo_located = spark.read.parquet('data/geo_located')

fl = spark.read.parquet(f"data/final_long")

out = fl.join(geo_located, ['component','cycle'], 'left')

out.coalesce(100) \
    .write \
    .partitionBy('cycle') \
    .mode("overwrite") \
    .parquet(f"data/final_long")
